## Replication File 1 of 2
## Cleans and preps data, outputs LL2024.csv
## File 2 inputs LL2024.csv for analyses
## "Reducing Prejudice Towards Refugees in Uganda: 
## Evidence that Social Networks Influence Attitudes"
## Jennifer M. Larson and Janet I. Lewis
## Created: Feb 2, 2022
## Updated: Feb 23, 2024

## Analyses performed in R version 4.3.1 (2023-06-16) -- "Beagle Scouts"

#install.packages("tidyverse")
#install.packages("igraph")
#install.packages("xtable")
#install.packages("stargazer")
#install.packages("plyr")

#library(plyr) 
library(tidyverse)
library(igraph)
#library(xtable)
#library(stargazer)


Data <- read_csv("LarsonLewis2024raw.csv") #521x95



##############################################################################
##############################################################################
## Clean and build outcome variables
##############################################################################
##############################################################################



## Add numeric variables for each of the attitude measures, scaled to all be
## increasing in pro-refugee sentiment.  That means some need to be flipped.

## There are six different questions. The most pro-refugee answer for each is:

#1) No problem (refugees_coming1, coming2, coming3), strongly agree [1] (agree high)
#2) Wouldn't fit (refugees_notfit1, notfit2, notfit3), strongly disagree [5] (agree low)
#3) Burden (refugees_burden1, burden2, burden3), strongly disagree [5] (agree low)
#4) Same values (refugees_values1, values2, values3), strongly agree [1] (agree high)
#5) Amount of land (ref_agricland, agricland2), increased a lot (1st factor) [1] (increased high)
#6) Threat (refugees_threat1, threat2, threat3), strongly disagree [5] (agree low)

## Rescale attitude questions 1-5 for  baseline1, baseline2, endline

## Convert to factors, use the appropriate levels to rescale:

levelsagreehigh <-  c("Strongly disagree",  
                      "Somewhat disagree", 
                      "Neither agree nor disagree",
                      "Somewhat agree",
                      "Strongly agree")
levelsagreelow <- c("Strongly agree",
                    "Somewhat agree",
                    "Neither agree nor disagree",
                    "Somewhat disagree",
                    "Strongly disagree")
levelsincreasedhigh <- c("decreased a lot",
                         "decreased a little",
                         "remain the same",
                         "increased a little",
                         "increased a lot")
levelslikelylow <- c("Very likely",
                     "Somewhat likely",
                     "Neither likely nor unlikely",
                     "Somewhat unlikely",
                     "Very unlikely")


Data$noprob1 <- as.numeric(factor(Data$refugees_coming2, levels = levelsagreehigh))
Data$noprob2 <- as.numeric(factor(Data$refugees_coming3, levels = levelsagreehigh))
Data$noprob_e <- as.numeric(factor(Data$refugees_coming2_e, levels = levelsagreehigh))

Data$fit1 <- as.numeric(factor(Data$refugees_notfit2, levels = levelsagreelow))
Data$fit2 <- as.numeric(factor(Data$refugees_notfit3, levels = levelsagreelow))
Data$fit_e <- as.numeric(factor(Data$refugees_notfit2_e, levels = levelsagreelow))

Data$burden1 <- as.numeric(factor(Data$refugees_burden2, levels = levelsagreelow))
Data$burden2 <- as.numeric(factor(Data$refugees_burden3, levels = levelsagreelow))
Data$burden_e <- as.numeric(factor(Data$refugees_burden2_e, levels = levelsagreelow))

Data$values1 <- as.numeric(factor(Data$refugees_values2, levels = levelsagreehigh))
Data$values2 <- as.numeric(factor(Data$refugees_values3, levels = levelsagreehigh))
Data$values_e <- as.numeric(factor(Data$refugees_values2_e, levels = levelsagreehigh))

Data$land1 <- as.numeric(factor(Data$ref_agricland, levels = levelsincreasedhigh))
Data$land2 <- as.numeric(factor(Data$ref_agricland2, levels = levelsincreasedhigh))
Data$land_e <- as.numeric(factor(Data$ref_agricland_e, levels = levelsincreasedhigh))

Data$threat1 <- as.numeric(factor(Data$refugees_threat2, levels = levelslikelylow))
Data$threat2 <- as.numeric(factor(Data$refugees_threat3, levels = levelslikelylow))
Data$threat_e <- as.numeric(factor(Data$refugees_threat2_e, levels = levelslikelylow))

## Index of pro-ref attitudes in baseline 1, baseline 2, and endline
Data$proref1 <- Data$noprob1+ Data$fit1 + Data$burden1 + Data$values1 + Data$land1 + Data$threat1
Data$proref2 <- Data$noprob2+ Data$fit2 + Data$burden2 + Data$values2 + Data$land2 + Data$threat2
Data$proref_e <- Data$noprob_e+ Data$fit_e + Data$burden_e + Data$values_e + Data$land_e + Data$threat_e


rm(levelsagreehigh)
rm(levelsagreelow)
rm(levelsincreasedhigh)
rm(levelslikelylow)

## Short-term change (baseline 2nd meas - baseline 1st)

Data$proref_st <- Data$proref2 - Data$proref1

## Long-term change (endline - baseline 1st):

Data$proref_lt <- Data$proref_e - Data$proref1


## Add a household number to data
Data$hhnum <- NA
Data$hhnum[which(Data$village_number ==1)] <- 1:sum(Data$village_number == 1)
Data$hhnum[which(Data$village_number ==2)] <- 1:sum(Data$village_number == 2)
Data$hhnum[which(Data$village_number ==3)] <- 1:sum(Data$village_number == 3)
Data$hhnum[which(Data$village_number ==4)] <- 1:sum(Data$village_number == 4)

## Make village-level datasets and treatment indices

## Create four separate village datasets
v1 <- Data[which(Data$village_number == 1),]
v2 <- Data[which(Data$village_number == 2),]
v3 <- Data[which(Data$village_number == 3),]
v4 <- Data[which(Data$village_number == 4),]

## Let's make indices for treated and control in each
t1 <- which(v1$treatment == 1)
t2 <- which(v2$treatment == 1)
t3 <- which(v3$treatment == 1)
t4 <- which(v4$treatment == 1)

c1 <- which(v1$treatment == 0)
c2 <- which(v2$treatment == 0)
c3 <- which(v3$treatment == 0)
c4 <- which(v4$treatment == 0)

## And treatment and control indices for the full dataset
t <- which(Data$treatment == 1)
c <- which(Data$treatment == 0)






##############################################################################
##############################################################################
## Construct networks from survey name questions, create network variables,
## merge into dataset
##############################################################################
##############################################################################


## Function to build edgelist from an ego variable and a list of alter variables:
make_el <- function(data, ego, alter_list){ #ego and alter_list both include the dataset too
  
  el <- matrix(data = NA, nrow = nrow(data)*length(alter_list), ncol = 2)
  colnames(el) <- c("ego", "alter")
  counter <- 1
  
  for(i in 1: nrow(data)){
    for(j in 1:length(alter_list)){
      el[counter,] <- c(ego[i], alter_list[[j]][i])
      counter <- counter + 1
    }
  }
  
  notempty <- which(el[,2] != "") ## Now clean it so alters of "" are listwise deleted.
  el <- el[notempty,]
  
  return(el)
}

## Function to convert individual-level edgelist to household-level

convert_to_hh <- function(hhlist, el){ #takes a hh reference list and an indvidual edgelist to make hh nw
  
  hhel <- matrix(data = NA, nrow = 0, ncol = 2)
  
  #For the ego and then alter, identify the household(s) they're in
  ## in terms of the hh number in the hh list
  ## (this assigns node numbers in order that hh appears in the data; preserves data ordering)
  for(i in 1:nrow(el)){
    
    hhego <- c()
    for(j in 1:length(hhlist)){
      if(sum(el[i,1] == hhlist[[j]])>0){
        hhego <- c(hhego, j)
      }
    }
    
    hhalter <- c()
    for(k in 1:length(hhlist)){
      if(sum(el[i,2] == hhlist[[k]])>0){
        hhalter <- c(hhalter, k)
      }
    }
    
    # If the ego or alter wasn't in any of the households in hhlist, go to the next row in el
    ## (Automatically closes this network)
    if(is.null(hhego)){
      next
    }
    if(is.null(hhalter)){
      next
    }
    
    #Now add all household links implied by all pairs of ego + alter to hhel
    
    for(l in 1:length(hhego)){
      for(m in 1:length(hhalter)){
        hhel <- rbind(hhel, c(hhego[l], hhalter[m]))
      }
    }
    
    #print(i)  
  }
  
  return(hhel)
  
}

## Test: make visit edgelist, convert to household

test <- make_el(v1, v1$ego, list(v1$visit1, v1$visit2, v1$visit3, v1$visit4, v1$visit5)) #326x2 matrix


familymat <- cbind(v1$ego, v1$hhh, v1$hh1, v1$hh2, v1$hh3, v1$hh4, v1$hh5,
                   v1$hh6, v1$hh7, v1$hh8, v1$hh9, v1$hh10, v1$hh11)


hhlist <- list()
hhcount <- c()
for(i in 1:length(v1$ego)){
  #Make sure to match to names, not blank strings
  names <- which(familymat[i,] != "")
  #Find the names of family members for 
  membs <- familymat[i,names]
  
  hhlist[[i]] <- unique(membs) #grab the unique names because ego and hhh might be repeated
  hhcount[i] <- length(unique(membs))
}

testhh <- convert_to_hh(hhlist, test)
## Confirmed

rm(test)
rm(testhh)
rm(familymat)
rm(hhlist)
rm(hhcount)

## Function to add attributes from our data to network objects
## Assumes node order is same as household order in data, 
## true here given the way the networks are built

addatts <- function(nw, data){
  
  V(nw)$treatment <- data$treatment
  V(nw)$proref1 <- data$proref1
  V(nw)$proref2 <- data$proref2
  V(nw)$proref_st <- data$proref_st
  V(nw)$proref_lt <- data$proref_lt
  V(nw)$wasrefugee <- ifelse(data$been_refugee == "Yes", 1, 0)
  V(nw)$proref_e <- data$proref_e
  V(nw)$threat1 <- data$threat1
  V(nw)$threat2 <- data$threat2
  V(nw)$threat_e <- data$threat_e
  V(nw)$converse <- data$converse
  V(nw)$occup <- data$resp_occupation
  V(nw)$gender <- data$gender
  V(nw)$religion <- data$religion
  V(nw)$age <- data$resp_age
  V(nw)$edu <- data$resp_educ_coarse
  V(nw)$language <-data$resp_language
  V(nw)$heardofsurvey <- data$vill_interview
  V(nw)$in_el <- data$in_el
  V(nw)$noprob1 <- data$noprob1
  V(nw)$noprob2 <- data$noprob2
  V(nw)$noprob_e <- data$noprob_e
  V(nw)$days_elapsed <- data$days_elapsed
  V(nw)$inperson_el <- data$survey_type_el
  
  out <- nw
  
  return(out)
  
}

## Now build household networks for all six networks, 
## plus the union of the four pre-treatment social networks.

makehhnws <- function(data, directed = FALSE, rmduplinks = TRUE, rmloops = TRUE){
  
  ## First grab the relevant variables for each network
  visitlist <- list(data$visit1, data$visit2, data$visit3, data$visit4, data$visit5) 
  meallist <- list(data$meal1, data$meal2, data$meal3, data$meal4, data$meal5)
  borrowlist <- list(data$borrow1, data$borrow2, data$borrow3, data$borrow4, data$borrow5)
  rumorlist <- list(data$rumor1, data$rumor2, data$rumor3, data$rumor4, data$rumor5)
  unionlist <- list(data$visit1, data$visit2, data$visit3, data$visit4, data$visit5, 
                    data$meal1, data$meal2, data$meal3, data$meal4, data$meal5, 
                    data$borrow1, data$borrow2, data$borrow3, data$borrow4, data$borrow5,
                    data$rumor1, data$rumor2, data$rumor3, data$rumor4, data$rumor5)
  willtelllist <- list(data$willtell1, data$willtell2, data$willtell3, data$willtell4, data$willtell5)
  spokereflist <- list(data$spokeref1, data$spokeref2, data$spokeref3, data$spokeref4, data$spokeref5,
                       data$spokeref6, data$spokeref7, data$spokeref8, data$spokeref9, data$spokeref10)
  
  
  ## Now make the edgelists, individual
  visit <- make_el(data = data, ego = data$ego, alter_list = visitlist) #326
  meal <- make_el(data = data, ego = data$ego, alter_list = meallist) #219
  borrow <- make_el(data = data, ego = data$ego, alter_list = borrowlist) #147
  rumor <- make_el(data = data, ego = data$ego, alter_list = rumorlist) #143
  union <- make_el(data = data, ego = data$ego, alter_list = unionlist) #4437
  willtell <- make_el(data = data, ego = data$ego, alter_list = willtelllist) #176
  spokeref <- make_el(data = data, ego = data$ego, alter_list = spokereflist)#768
  
  ## Now convert the edgelists to household
  
  ## First make a matrix of household members,
  familymat <- cbind(data$ego, data$hhh, data$hh1, data$hh2, data$hh3, data$hh4, data$hh5,
                     data$hh6, data$hh7, data$hh8, data$hh9, data$hh10, data$hh11)
  
  ## Now construct a list of household members from this matrix
  
  hhlist <- list()
  hhcount <- c()
  
  for(i in 1:length(data$ego)){
    #Make sure to match to names, not blank strings
    names <- which(familymat[i,] != "")
    #Find the names of family members for 
    membs <- familymat[i,names]
    
    hhlist[[i]] <- unique(membs) #grab the unique names because ego and hhh might be repeated
    hhcount[i] <- length(unique(membs))
  }
  
  ## Now convert each edgelist to household
  visithh <- convert_to_hh(hhlist, visit)
  mealhh <- convert_to_hh(hhlist, meal)
  borrowhh <- convert_to_hh(hhlist, borrow)
  rumorhh <- convert_to_hh(hhlist, rumor)
  unionhh <- convert_to_hh(hhlist, union)
  willtellhh <- convert_to_hh(hhlist, willtell)
  spokerefhh <- convert_to_hh(hhlist, spokeref)
  
  if(rmduplinks == TRUE){
    
    if(directed == TRUE){
      visithh <- unique(visithh, MARGIN = 1)
      mealhh <- unique(mealhh, MARGIN = 1)
      borrowhh <- unique(borrowhh, MARGIN = 1)
      rumorhh <- unique(rumorhh, MARGIN = 1)
      unionhh <- unique(unionhh, MARGIN = 1)
      willtellhh <- unique(willtellhh, MARGIN = 1)
      spokerefhh <- unique(spokerefhh, MARGIN = 1)
    }
    
    ## Removing duplicate links from undirected network requires first ordering
    ## the egos and alters and then taking the unique
    ## Sorting each row of nxm returns mxn, so take transpose to return to nxm
    
    if(directed == FALSE){
      visithh <- t(apply(visithh, MARGIN = 1, sort))
      visithh <- unique(visithh, MARGIN = 1)
      mealhh <- t(apply(mealhh, MARGIN = 1, sort))
      mealhh <- unique(mealhh, MARGIN = 1)
      borrowhh <- t(apply(borrowhh, MARGIN = 1, sort))
      borrowhh <- unique(borrowhh, MARGIN = 1)
      rumorhh <- t(apply(rumorhh, MARGIN = 1, sort))
      rumorhh <- unique(rumorhh, MARGIN = 1)
      unionhh <- t(apply(unionhh, MARGIN = 1, sort))
      unionhh <- unique(unionhh, MARGIN = 1)
      willtellhh <- t(apply(willtellhh, MARGIN = 1, sort))
      willtellhh <- unique(willtellhh, MARGIN = 1)
      spokerefhh <- t(apply(spokerefhh, MARGIN = 1, sort))
      spokerefhh <- unique(spokerefhh, MARGIN = 1)
    }
    
  }
  
  ## also deal with and count the loops
  
  if(rmloops == TRUE){
    vloops <- which(visithh[,1] == visithh[,2])
    if(length(vloops)>0){
      visithh <- visithh[-(vloops),]
    }
    
    mloops <- which(mealhh[,1] == mealhh[,2])
    if(length(mloops)>0){
      mealhh <- mealhh[-(mloops),]
    }
    
    bloops <- which(borrowhh[,1] == borrowhh[,2])
    if(length(bloops)>0){
      borrowhh <- borrowhh[-(bloops),]
    }
    
    rloops <- which(rumorhh[,1] == rumorhh[,2])
    if(length(rloops)>0){
      rumorhh <- rumorhh[-(rloops),]
    }
    
    uloops <- which(unionhh[,1] == unionhh[,2])
    if(length(uloops)>0){
      unionhh <- unionhh[-(uloops),]
    }
    
    wloops <- which(willtellhh[,1] == willtellhh[,2])
    if(length(wloops)>0){
      willtellhh <- willtellhh[-(wloops),]
    }
    
    sloops <- which(spokerefhh[,1] == spokerefhh[,2])
    if(length(sloops)>0){
      spokerefhh <- spokerefhh[-(sloops),]
    }
    
  }
  
  ## Now build the networks
  visitnw <- graph_from_edgelist(visithh, directed = directed)
  mealnw <- graph_from_edgelist(mealhh, directed = directed)
  borrownw <- graph_from_edgelist(borrowhh, directed = directed)
  rumornw <- graph_from_edgelist(rumorhh, directed = directed)
  unionnw <- graph_from_edgelist(unionhh, directed = directed)
  willtellnw <- graph_from_edgelist(willtellhh, directed = directed)
  spokerefnw <- graph_from_edgelist(spokerefhh, directed = directed)
  
  
  ## Add singletons that have a node index higher than the last included
  ## as a party to a link in the el
  ## (igraph fills in intermediate singletons, but not those that happen to
  ## be at the end of the node list)
  
  if(vcount(visitnw) < length(data$ego)){
    lastone <- max(V(visitnw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    visitnw <- add_vertices(visitnw, numtoadd) #add these-- they'll be added as higher numbers
  }
  
  if(vcount(mealnw) < length(data$ego)){
    lastone <- max(V(mealnw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    mealnw <- add_vertices(mealnw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  if(vcount(borrownw) < length(data$ego)){
    lastone <- max(V(borrownw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    borrownw <- add_vertices(borrownw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  if(vcount(rumornw) < length(data$ego)){
    lastone <- max(V(rumornw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    rumornw <- add_vertices(rumornw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  if(vcount(unionnw) < length(data$ego)){
    lastone <- max(V(unionnw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    unionnw <- add_vertices(unionnw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  if(vcount(willtellnw) < length(data$ego)){
    lastone <- max(V(willtellnw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    willtellnw <- add_vertices(willtellnw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  if(vcount(spokerefnw) < length(data$ego)){
    lastone <- max(V(spokerefnw)) #find the last vertex added
    numtoadd <- length(data$ego) - lastone #figure out how many are missing after that
    spokerefnw <- add_vertices(spokerefnw, numtoadd) #add these-- the'll be added as higher numbers
  }
  
  ## Add name and household number and household count attributes
  
  V(visitnw)$hhnum <- 1:vcount(visitnw)
  V(visitnw)$hhnum_check <- data$hhnum #will let us verify node order matches data order
  V(visitnw)$name <- data$ego
  V(visitnw)$hhcount <- hhcount
  
  V(mealnw)$hhnum <- 1:vcount(mealnw)
  V(mealnw)$hhnum_check <- data$hhnum
  V(mealnw)$name <- data$ego
  V(mealnw)$hhcount <- hhcount
  
  V(borrownw)$hhnum <- 1:vcount(borrownw)
  V(borrownw)$hhnum_check <- data$hhnum
  V(borrownw)$name <- data$ego
  V(borrownw)$hhcount <- hhcount
  
  V(rumornw)$hhnum <- 1:vcount(rumornw)
  V(rumornw)$hhnum_check <- data$hhnum
  V(rumornw)$name <- data$ego
  V(rumornw)$hhcount <- hhcount
  
  V(unionnw)$hhnum <- 1:vcount(unionnw)
  V(unionnw)$hhnum_check <- data$hhnum
  V(unionnw)$name <- data$ego
  V(unionnw)$hhcount <- hhcount
  
  V(willtellnw)$hhnum <- 1:vcount(willtellnw)
  V(willtellnw)$hhnum_check <- data$hhnum
  V(willtellnw)$name <- data$ego
  V(willtellnw)$hhcount <- hhcount
  
  V(spokerefnw)$hhnum <- 1:vcount(spokerefnw)
  V(spokerefnw)$hhnum_check <- data$hhnum
  V(spokerefnw)$name <- data$ego
  V(spokerefnw)$hhcount <- hhcount
  
  ## Add other attributes in addatts function
  
  visitnw <- addatts(visitnw, data = data)
  mealnw <- addatts(mealnw, data = data)
  borrownw <- addatts(borrownw, data = data)
  rumornw <- addatts(rumornw, data = data)
  unionnw <- addatts(unionnw, data = data)
  willtellnw <- addatts(willtellnw, data = data)
  spokerefnw <- addatts(spokerefnw, data = data)
  
  out <- list("visit" = visitnw, 
              "meal" = mealnw, 
              "borrow" = borrownw, 
              "rumor" = rumornw, 
              "union" = unionnw, 
              "willtell" = willtellnw, 
              "spokeref" = spokerefnw)
  
  return(out)
  
}

## Create the networks for all four villages
## Each is a list of seven networks (visit, meal, borrow, rumor, union, willtell, and spokeref)

v1n <- makehhnws(data = v1, directed = TRUE, rmduplinks = TRUE, rmloops = TRUE)
v2n <- makehhnws(data = v2, directed = TRUE, rmduplinks = TRUE, rmloops = TRUE)
v3n <- makehhnws(data = v3, directed = TRUE, rmduplinks = TRUE, rmloops = TRUE)
v4n <- makehhnws(data = v4, directed = TRUE, rmduplinks = TRUE, rmloops = TRUE)

## Pull out the union networks and rename for convenience:

g1 <- v1n$union
g2 <- v2n$union
g3 <- v3n$union
g4 <- v4n$union

## First use built-in check to make sure attributes were pulled from right place:
all.equal(V(g1)$hhnum, V(g1)$hhnum_check)
all.equal(V(g2)$hhnum, V(g2)$hhnum_check)
all.equal(V(g3)$hhnum, V(g3)$hhnum_check)
all.equal(V(g4)$hhnum, V(g4)$hhnum_check)

## Good, were made in order, hhnum will work for matching network attibutes back
## into the data.

vcount(g1) #127
vcount(g2) #98
vcount(g3) #146
vcount(g4) #150

ecount(g1) #301
ecount(g2) #324
ecount(g3) #858
ecount(g4) #539


## Next, calculate features of household network neighborhoods 
## in the union and the spoke to networks so we can add these as variables
## in our dataset

## Then add from the individual four pre-treatment networks to 

neighborhoodvars <- function(nw, vilnum = NA, names = c("neighbsize", "neighbtreat", "neighbproref")){
  # For all neighbors
  neighbsize <- c()
  neighbtreat <- c()
  neighbproref <- c()
  
  for (i in 1:vcount(nw)){
    # All neighbors:
    neighbors <- neighbors(nw, v = i, mode = "all") #neighbors does not include ego
    neighbsize[i] <- length(neighbors) #number of network neighbors
    treatments <- V(nw)$treatment[neighbors]
    neighbtreat[i] <- sum(treatments) #number of treated neighbors
    proref <- V(nw)$proref1[neighbors]
    neighbproref[i] <- mean(proref) #average baseline prorefugee score of neighbors
    
  }
  
  
  out <- cbind(neighbsize, neighbtreat,neighbproref)
  out <- as.data.frame(out)
  names(out) <- names
  out$ego <- V(nw)$name
  out$hhnum <- V(nw)$hhnum
  out$village_number <- rep(vilnum, vcount(nw))
  return(out)
}

g1n <- neighborhoodvars(g1, vilnum = 1)
g2n <- neighborhoodvars(g2, vilnum = 2)
g3n <- neighborhoodvars(g3, vilnum = 3)
g4n <- neighborhoodvars(g4, vilnum = 4)



## Now let's add these to the main dataset

neighbtoadd <- rbind(g1n, g2n, g3n, g4n)

## Because of polygamous households, ego + village_number won't 
## uniquely identify households.  Need to use hhnum.

dim(Data) #521x119
dim(neighbtoadd) #521x6,3 new + 3 matching vars

Data <- merge(Data, neighbtoadd, by = c("ego", "village_number", "hhnum"))


dim(Data) #521x122





## Tidy


rm(neighbtoadd)

rm(addatts, convert_to_hh, make_el, makehhnws, neighborhoodvars)
rm(names)



##############################################################################
##############################################################################
## Create and clean covariates to use in regression analyses
##############################################################################
##############################################################################



## First, add variables accounting for treated neighbors
## in the network.

## Proportion of neighbors treated:
Data$propneighbtreat <- Data$neighbtreat / Data$neighbsize


## Presence of one or more treated neighbors:
Data$someneighbtreat <- ifelse(Data$neighbtreat > 0, 1, 0)


## Coarsen religion variable

relig <- Data$religion
relig[which(relig %in% c("Anglican", "Pentacostal", "Other"))] <- "Other"
Data$relig <- factor(relig, levels = c("Other", "Muslim", "Catholic", "Protestant"))
rm(relig)

## Coarsen years_live variable
Data$morethan5 <- ifelse(Data$years_live == "More than 5 years", 1, 0)

## Now add individual network attributes including distance to reference
## categories:

newneighborhoodvars <- function(nw, vilnum = NA, extremenum = 3, names = c("disttocranky1", "disttocranky2",
                                                                           "disttomostneg", "disttopersuaded",
                                                                           "disttomostpos", 
                                                                           "disttocranky1b", "disttocranky2b",
                                                                           "disttomostnegb", "disttopersuadedb",
                                                                           "disttomostposb",
                                                                           "eigen", "betweenness", "closeness",
                                                                           "trans" )){
  # For all neighbors
  disttocranky1 <- c()
  disttocranky2 <- c()
  disttomostneg <- c()
  disttopersuaded <- c()
  disttomostpos <- c()
  disttocranky1b <- c()
  disttocranky2b <- c()
  disttomostnegb <- c()
  disttopersuadedb <- c()
  disttomostposb <- c()  
  
  
  for (i in 1:vcount(nw)){
    stloss <- sort(V(nw)$proref_st, decreasing = FALSE)[extremenum] #find cut for extremenum most neg
    cranky1 <- which(V(nw)$proref_st <= stloss) #nodes that went most negative
    holder <- c()
    for(j in 1:length(cranky1)){
      holder[j] <- distances(nw, v = i, to = cranky1[j], mode = "all")
    }
    disttocranky1[i] <- min(holder)
    cranky1withoutrefs <- holder[which(holder != 0)] #exclude 0 distance from options (the references need dist to a different reference)
    disttocranky1b[i] <- min(cranky1withoutrefs)
    
    stneg <- which(V(nw)$proref_st < 0)
    howneg <- V(nw)$proref2[stneg]
    stcut <- sort(howneg, decreasing = FALSE)[extremenum]
    cranky2 <- which(V(nw)$proref_st<0 & V(nw)$proref2 <= stcut) ## Which went negative, ending at most extreme value?
    holder2 <- c()
    for(j in 1:length(cranky2)){
      holder2[j] <- distances(nw, v = i, to = cranky2[j], mode = "all")
    }
    disttocranky2[i] <- min(holder2)
    cranky2withoutrefs <- holder2[which(holder2 != 0)]
    disttocranky2b[i] <- min(cranky2withoutrefs)
    
    mostnegval <- sort(V(nw)$proref1, decreasing = FALSE)[extremenum]
    mostneg <- which(V(nw)$proref1 <= mostnegval)
    holder3 <- c()
    for(j in 1:length(mostneg)){
      holder3[j] <- distances(nw, v = i, to = mostneg[j], mode = "all")
    }
    disttomostneg[i] <- min(holder3)
    mostnegwithoutrefs <- holder3[which(holder3 != 0)]
    disttomostnegb[i] <- min(mostnegwithoutrefs)
    
    stmostgain <- sort(V(nw)$proref_st, decreasing = TRUE)[extremenum]
    mostpersuaded <- which(V(nw)$proref_st >= stmostgain)
    holder4 <- c()
    for(j in 1:length(mostpersuaded)){
      holder4[j] <- distances(nw, v = i, to = mostpersuaded[j], mode = "all")
    }
    disttopersuaded[i] <- min(holder4)
    persuadedwithoutrefs <- holder4[which(holder4 != 0)]
    disttopersuadedb[i] <- min(persuadedwithoutrefs)
    
    mostpos <- sort(V(nw)$proref1, decreasing = TRUE)[extremenum]
    themostpos <- which(V(nw)$proref1 >= mostpos)
    holder5 <- c()
    for(j in 1:length(themostpos)){
      holder5[j] <- distances(nw, v = i, to = themostpos[j], mode = "all")
    }
    disttomostpos[i] <- min(holder5)
    mostposwithoutrefs <- holder5[which(holder5 != 0)]
    disttomostposb[i] <- min(mostposwithoutrefs)
    
  }
  
  eigen <- eigen_centrality(nw)$vector
  btw <- betweenness(nw, directed = FALSE, normalized = TRUE)
  #In some versions of igraph, closeness in tiny components is evaluated high. Want to restrict
  # to only members of the giant component.
  closeness <- closeness(nw, mode = "all", normalized = TRUE)
  osgiant <- which(components(nw)$membership != 1) #grab all the nodes outside the giant componnet
  closeness[osgiant] <- NA #replace their centrality score with NA
  trans <- transitivity(nw, type = "local")
  
  ## Replace Infs in distance scores with NAs
  disttomostneg[which(disttomostneg == Inf)] <- NA
  disttomostpos[which(disttomostpos == Inf)] <- NA
  disttocranky1[which(disttocranky1 == Inf)] <- NA
  disttocranky2[which(disttocranky2 == Inf)] <- NA
  disttopersuaded[which(disttopersuaded == Inf)] <- NA
  disttomostnegb[which(disttomostnegb == Inf)] <- NA
  disttomostposb[which(disttomostposb == Inf)] <- NA
  disttocranky1b[which(disttocranky1b == Inf)] <- NA
  disttocranky2b[which(disttocranky2b == Inf)] <- NA
  disttopersuadedb[which(disttopersuadedb == Inf)] <- NA
  
  out <- cbind(disttocranky1, disttocranky2, disttomostneg, disttopersuaded, disttomostpos,
               disttocranky1b, disttocranky2b, disttomostnegb, disttopersuadedb, disttomostposb,
               eigen, btw, closeness, trans)
  
  out <- as.data.frame(out)
  names(out) <- names
  out$ego <- V(nw)$name
  out$hhnum <- V(nw)$hhnum
  out$village_number <- rep(vilnum, vcount(nw))
  return(out)
}

## Calculate these variables for all four village's networks

g1n <- newneighborhoodvars(g1, vilnum = 1)
g2n <- newneighborhoodvars(g2, vilnum = 2)
g3n <- newneighborhoodvars(g3, vilnum = 3)
g4n <- newneighborhoodvars(g4, vilnum = 4)

## Collect them into one matrix
newneighbtoadd <- rbind(g1n, g2n, g3n, g4n)

## And merge them into the dataset
dim(Data) #521x126
dim(newneighbtoadd) #521x17

Data <- merge(Data, newneighbtoadd, by = c("ego", "village_number", "hhnum"))

## Now remake village datasets

v1 <- Data[which(Data$village_number == 1),]
v2 <- Data[which(Data$village_number == 2),]
v3 <- Data[which(Data$village_number == 3),]
v4 <- Data[which(Data$village_number == 4),]

## And remake indices for treated and control in each 

t <- which(Data$treatment == 1)
c <- which(Data$treatment == 0)

t1 <- which(v1$treatment == 1)
t2 <- which(v2$treatment == 1)
t3 <- which(v3$treatment == 1)
t4 <- which(v4$treatment == 1)

c1 <- which(v1$treatment == 0)
c2 <- which(v2$treatment == 0)
c3 <- which(v3$treatment == 0)
c4 <- which(v4$treatment == 0)

rm(newneighbtoadd, newneighborhoodvars)



## Now add an indicator for being the reference group for the distance variables.

Data$mostpos <- ifelse(Data$disttomostpos == 0, 1, 0)
Data$mostneg <- ifelse(Data$disttomostneg == 0, 1, 0)
Data$mostpersuaded <- ifelse(Data$disttopersuaded == 0, 1, 0)
Data$mostcranky <- ifelse(Data$disttocranky2 == 0, 1, 0)






## Output the dataset as LL2024.csv

write.csv(Data, "LL2024.csv") #521 x 144













##############################################################################
##############################################################################
## Export node lists and edgelists to be imported into Gephi
## to build Paper Figure 8:
##############################################################################
##############################################################################



## Uncomment the following code to write edgelists and node attribute lists
## to csv files that can be imported into Gephi to make Figure 8

## Can see a less nicely formatted version of the networks in Figure 8 using
## R's plot function instead:

plot(as.undirected(g1))
plot(as.undirected(g2))
plot(as.undirected(g3))
plot(as.undirected(g4))

## Gephi requires edge and node lists be in a particular format to import correctly.
## These functions output the correct format.

## Grab edgelist from undirected, simplified version of the networks
## Collect households in terms of their household number, not the ego identifier

makegephiel <- function(nw){
  
  #make undirected
  nw <- as.undirected(nw)
  
  #make sure no duplicates or loops
  nw <- simplify(nw)
  
  #get the edgelist
  el <- get.edgelist(nw, names = FALSE) #grabs i.t.o. hhnum (the node id) rather than ego name
  
  #add the column of "undirected" for gephi
  out <- cbind(el, rep("undirected", nrow(el)))
  
  colnames(out) <- c("Source", "Target", "Type")
  
  return(out)
}

write.csv(makegephiel(g1), file = "el1.csv", row.names = FALSE)
write.csv(makegephiel(g2), file = "el2.csv", row.names = FALSE)
write.csv(makegephiel(g3), file = "el3.csv", row.names = FALSE)
write.csv(makegephiel(g4), file = "el4.csv", row.names = FALSE)

## Now make attribute files in terms of the household number.  Includes religion.

makegephiatt <- function(data){
  
  #Make dataset in correct format
  attds <- data[,c("hhnum", "hhnum", "ego", "religion")]
  
  #Add labels in correct format
  names(attds) <- c("Id", "Label", "ego", "Religion")
  
  return(attds)
}

write.csv(makegephiatt(v1), file = "att1.csv", row.names = FALSE)
write.csv(makegephiatt(v2), file = "att2.csv", row.names = FALSE)
write.csv(makegephiatt(v3), file = "att3.csv", row.names = FALSE)
write.csv(makegephiatt(v4), file = "att4.csv", row.names = FALSE)


## Number of isolates that should be in view for each village's network:

c(sum(degree(g1) == 0),
  sum(degree(g2) == 0),
  sum(degree(g3) == 0),
  sum(degree(g4) == 0)
)

#14, 3, 0, and 0


## Now for each village, import the node and link csv files into Gephi,
## size the nodes proportional to degree, and color by religion.
## Results in visualizations in Figure 8.







